from lxml import etree
import requests
import pymysql

if __name__ == '__main__':
    db = pymysql.connect(host='localhost', user='root', password='123456', database='py_sql_test1')
    cursor = db.cursor()

    url = "https://www.jianzhimao.com/ctrlcity/changeCity.html"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"

    }
    page = requests.get(url=url, headers=headers).text
    tree = etree.HTML(page)
    title_list_name = tree.xpath('//ul[@class="city_table"]//a[@target="_blank"]/text()')
    title_list_url = tree.xpath('//ul[@class="city_table"]//a/@href')
    for i in range(0, len(title_list_url)):  # 获取具体城市内容网页
        chile_url = title_list_url[i]
        page = requests.get(url=chile_url, headers=headers).text
        tree = etree.HTML(page)
        title_list = tree.xpath('//div[@class="content_page_box"]//a/@href')

        #爬取城市名加网址，如何写入数据库
        sql = """insert into 兼职猫城市名字(城市,网址)
        values('%s','%s')
        """%(title_list_name[i],title_list_url[i])
        try:
            cursor.execute(sql)
            db.commit()
        except Exception as ex:
            print(ex)
            db.rollback()

        j = 1
        for j in range(0, len(title_list)):
            chile_url = title_list_url[i].rstrip("/") + title_list[j]

            page = requests.get(url=chile_url, headers=headers).text
            tree = etree.HTML(page)  # 爬取该城市所有工作页面链接

            title_list_url_1_1 = tree.xpath('//ul[@class="content_list_wrap"]//li//a/@href')
            title_list_url_1 = tree.xpath('//ul[@class="content_list_wrap"]//li//a/@href | '
                                          '//ul[@class="content_list_wrap"]//li//a//text()|'
                                          '//ul[@class="content_list_wrap"]//li/div[1]//span/@title|'
                                          '//ul[@class="content_list_wrap"]//li/div[2]//span/@title|'
                                          '//ul[@class="content_list_wrap"]//li//div[@class="left date"]/@title')



            for z in range(0, len(title_list_url_1_1)):
                chile_url_url = title_list_url[0].rstrip("/") + title_list_url_1_1[z]
                page = requests.get(url=chile_url_url, headers=headers).text
                tree = etree.HTML(page)

                #写入数据库
                ab = pymysql.connect(host='localhost', user='root', password='123456', database='py_sql_test1')
                cursor = ab.cursor()
                sql = """insert into 兼职粗况(城市,网址,招聘名字,区域,访问人数,发布时间)
                    values('%s','%s','%s','%s','%s','%s')
                    """ % (title_list_name[i], chile_url_url, str(title_list_url_1[1]),
                           str(title_list_url_1[2]), str(title_list_url_1[3]), str(title_list_url_1[4]))
                try:
                    cursor.execute(sql)
                    ab.commit()
                except Exception as ex:
                    print(ex)
                    ab.rollback()


                title_list_url_2_0 = [a.strip() for a in tree.xpath('///div[@class="job_base"]//span[@class="job_price"]/text()')]
                title_list_url_2_1 = [a.strip() for a in tree.xpath('///div[@class="job_base"]//a[@class="job_type"]/text()')]
                title_list_url_2_2 = [a.strip() for a in tree.xpath(
                    '//div[@class="job_content"]/ul[@class="job_list"]//text()')]  # 消除提取内容的\t
                title_list_url_2_3 = [a.strip() for a in tree.xpath('//div[@class="box"]//text()')]
                title_list_url_2_4 = [a.strip() for a in tree.xpath('//div[@class="company_info"]//text()')]

                # print(title_list_url_1_1[z])#网址
                # print(title_list_url_2_0)#工资
                # print(title_list_url_2_1)#职业
                # print(title_list_url_2_2)#招聘信息
                # print(title_list_url_2_3)#工作详情
                # print(title_list_url_2_4)#公司介绍

                for o in range(0,len(chile_url_url)):
                    # 写入数据库
                    ac = pymysql.connect(host='localhost', user='root', password='123456', database='py_sql_test1')
                    cursor = ac.cursor()

                    k1=0
                    list1=""
                    for k1 in range(0,len(title_list_url_2_2)):
                        if title_list_url_2_2[k1]!='':
                            list1=list1+title_list_url_2_2[k1]
                        else:
                            list1=list1
                    k2 = 0
                    list2 = ""
                    for k2 in range(0, len(title_list_url_2_3)):
                        if title_list_url_2_3[k2] != '':
                            list2 = list2 + title_list_url_2_3[k2]
                        else:
                            list2 = list2
                    k3 = 0
                    list3 = ""
                    for k3 in range(0, len(title_list_url_2_4)):
                        if title_list_url_2_4[k3] != '':
                            list3 = list3 + title_list_url_2_4[k3]
                        else:
                            list3 = list3
                    for k4 in range(0, len(title_list_url_2_0)):
                        list4=title_list_url_2_0[0]
                    for k5 in range(0, len(title_list_url_2_0)):
                        list5=title_list_url_2_1[0]

                    sql = """insert into 具体兼职内容(具体工作内容网址,工资,职业,招聘信息,工作详细,公司介绍)
                                        values('%s','%s','%s','%s','%s','%s')
                                        """ % (chile_url_url,list4,list5,
                                               list1,list2,list3)

                    try:
                        cursor.execute(sql)
                        ac.commit()
                    except Exception as ex:
                        print(ex)
                        ac.rollback()


            ab.close()
    db.close()